###################################
# PREPARE FILES - comparisons with agriculture
###################################
# warning: depending on the treatment and comparison, you might have to adapt the code at places
# questions to bowydenbraber@gmail.com

#read file
cu_base_agri<-read.csv("CU00_COMPLETE.csv")


##########only run when comparing with agricultural landholdings:
cu_prep_agriculture<-cu_base_agri

# define sparsely populated
# prepare
cu_prep_agriculture$DOM_actor<-as.character(cu_prep_agriculture$DOM_actor)
#cat0 is for PAs
cu_prep_agriculture$DOM_actor<-ifelse(cu_prep_agriculture$is_NOPA_incr==FALSE,"cat0",cu_prep_agriculture$DOM_actor)
cu_prep_agriculture$DOM_actor<-ifelse(cu_prep_agriculture$is_ALL_present==TRUE,"cat0",cu_prep_agriculture$DOM_actor)

#define sparsely populated
cu_prep_agriculture$DOM_actor<-ifelse((cu_prep_agriculture$propsettled<0.1 & cu_prep_agriculture$control==1),"cat5",cu_prep_agriculture$DOM_actor)

#reduce overlaps
cu_prep_agriculture<-subset(cu_prep_agriculture,!(propsettled>0.1 & control==0))

#set the increases
#proportion
for(k in c("ALL","UC_X","PI","US_X","TI")) cu_base_agri[,paste("p_",k,"_incr",sep="")] <- cu_base_agri[,paste("p_",k,"_10",sep="")] - cu_base_agri[,paste("p_",k,"_00",sep="")]
#by threshold (10%)
for(k in c("UC_X","PI","US_X","TI")) cu_base_agri[,paste("is_",k,"_incr",sep="")] <- cu_base_agri[,paste("p_",k,"_incr",sep="")] > 0.1
#by threshold (50%)
for(k in c("UC_X","PI","US_X","TI")) cu_base_agri[,paste("is_",k,"_incr50",sep="")] <- cu_base_agri[,paste("p_",k,"_incr",sep="")] > 0.5
#set non increases
cu_base_agri$is_NOPA_incr <- cu_base_agri$p_ALL_incr < 0.01

#set the presences (threshold 1%)
for(k in c("UC_X","PI","US_X","TI")) cu_base_agri[,paste("is_",k,"_presb",sep="")] <- cu_base_agri[,paste("p_",k,"_00",sep="")] > 0.01

#filter further
cu_base_agri<-filter(cu_base_agri,is_NOPA_incr==FALSE | p_ALL_incr<0.01)# removes slight increases under 1%
cu_base_agri<-cu_D[!(cu_base_agri$is_NOPA_incr==TRUE & cu_base_agri$p_ALL_00>0.01),] # remove instances where PA is present, but not increasing

#write in between
#write.csv(cu_prep_agriculture,file="CU00_PREPARED_agriculture.csv")

#read file
#cu<-read.csv("CU00_PREPARED_agriculture.csv")
cu<-cu_prep_agriculture
pacontrols<-c("cat1","cat2","cat3","cat4","cat5")
#cat1: very small
#cat2: small
#cat3: medium
#cat4: large
#cat5: sparsely populated

#pick control
pacontrol<-#pacontrol
  cu.ct <- cu[cu[,paste0("is_",pacontrol,"_incr")], ]


#PREPARE COVARIATES 

#covariates for all analyses
covariates=c("mine_presabs_before2000_T",
             "mun.state.num",
             "forest_mean",
             "tt_standard1",
             "d.pop.00", 
             "area",
             "hhh.inc.avg.00_infl",
             "p.hhh.lit.00",
             "gini.00",
             "p.hh.bs.poor.00",
             "slope",
             "elevation",
             "flooded")


################################Set treatment (for all analyses)

#edit for each PA (here IT as example)
#remove increases of other PAs
cu<-cu %>% dplyr::filter(p_US_X_incr<0.0001) %>% dplyr::filter(p_PI_incr<0.0001)
#remove baselines of other PAs
cu<-cu %>% dplyr::filter(p_US_X_00<0.0001) %>% dplyr::filter(p_PI_00<0.0001)
#remove slight increases between 0.01 and 0.1
cu<-cu %>% dplyr::filter(!(p_TI_incr>0.01 & is_TI_incr==FALSE))
#remove areas where TI increases, but propsettled is higher than 0.01
if(!(pacontrol %in% c("general","mining"))){
  cu<-cu %>% dplyr::filter(!(propsettled>0.01 & is_TI_incr==FALSE))
}



#if robustness test  
cu.tr <- cu[cu[,paste0("is_",patype,"_incr50")], ]
#if 10% treshold
cu.tr <- cu[cu[,paste0("is_",patype,"_incr")], ]

#if expansion to after 2006
cu_after2006<-cu.tr[!(cu.tr$is_NOPA_incr==FALSE & cu.tr$p_TI_10-cu.tr$p_TI_05<0.1 & cu.tr$p_TI_05>0.1),]



######################FINAL PART FOR ALL ANALYSES

#set treatment to binary and combine data
cu.tr$tr <- 1
cu.ct$tr <- 0
cu.in <- rbind(cu.tr,cu.ct)

#scale the numeric covariates
covs_forlm_numeric<-covs_forlm_numeric<-c("forest_mean", 
                                          "tt_standard1",
                                          "d.pop.00", 
                                          "area",
                                          "hhh.inc.avg.00_infl",
                                          "p.hhh.lit.00",
                                          "gini.00",
                                          "p.hh.bs.poor.00",
                                          "slope",
                                          "elevation",
                                          "flooded",
                                          "propsettled_scaled")

cu.in[c(covs_forlm_numeric_scaled)] <- lapply(cu.in[covs_forlm_numeric], function(x) c(scale(x)))

#write
write.csv(cu.in,"CU00_PREPARED_FOR_MATCHING.csv")
